{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true,
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# 7 层级索引（hierarchical indexing）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cloth  size\n",
      "a      0      -0.220064\n",
      "       1      -1.066040\n",
      "       2       0.401037\n",
      "b      0       0.617559\n",
      "       1      -0.061682\n",
      "       2       1.613820\n",
      "c      0      -0.244883\n",
      "       1      -0.005319\n",
      "       2      -0.866422\n",
      "d      0       0.816196\n",
      "       1      -0.458512\n",
      "       2      -0.196433\n",
      "dtype: float64\n",
      "<class 'pandas.core.series.Series'>\n",
      "<class 'pandas.core.indexes.multi.MultiIndex'>\n",
      "MultiIndex([('a', 0),\n",
      "            ('a', 1),\n",
      "            ('a', 2),\n",
      "            ('b', 0),\n",
      "            ('b', 1),\n",
      "            ('b', 2),\n",
      "            ('c', 0),\n",
      "            ('c', 1),\n",
      "            ('c', 2),\n",
      "            ('d', 0),\n",
      "            ('d', 1),\n",
      "            ('d', 2)],\n",
      "           names=['cloth', 'size'])\n",
      "[['a', 'b', 'c', 'd'], [0, 1, 2]]\n"
     ]
    },
    {
     "data": {
      "text/plain": "FrozenList([[0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]])"
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "#MultiIndex是层级索引，索引类型的一种\n",
    "index1 = pd.MultiIndex.from_arrays([['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd'],\n",
    "                [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]], names=['cloth', 'size'])\n",
    "\n",
    "ser_obj = pd.Series(np.random.randn(12),index=index1)\n",
    "print(ser_obj)\n",
    "print(type(ser_obj)) #Series\n",
    "print(type(ser_obj.index)) #索引类型，MultiIndex\n",
    "print(ser_obj.index)\n",
    "print(ser_obj.index.levels) #层级索引的索引值\n",
    "ser_obj.index.codes  #没那么重要，代表索引的位置\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-06T03:05:31.884834300Z",
     "start_time": "2024-07-06T03:05:31.184349800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "outputs": [
    {
     "data": {
      "text/plain": "cloth  size\na      0      -0.220064\n       1      -1.066040\n       2       0.401037\nb      0       0.617559\n       1      -0.061682\n       2       1.613820\nc      0      -0.244883\n       1      -0.005319\n       2      -0.866422\nd      0       0.816196\n       1      -0.458512\n       2      -0.196433\ndtype: float64"
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ser_obj"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-06T03:06:45.172354800Z",
     "start_time": "2024-07-06T03:06:45.164890200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------------------\n",
      "size\n",
      "0   -0.244883\n",
      "1   -0.005319\n",
      "2   -0.866422\n",
      "dtype: float64\n",
      "--------------------------------------------------\n",
      "0.4010371494446873\n",
      "--------------------------------------------------\n",
      "cloth\n",
      "a    0.401037\n",
      "b    1.613820\n",
      "c   -0.866422\n",
      "d   -0.196433\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "#层级索引如何取数据\n",
    "print('-'*50)\n",
    "print(ser_obj['c']) #取出c的所有数据，取出的是series\n",
    "print('-'*50)\n",
    "print(ser_obj['a', 2])\n",
    "print('-'*50)\n",
    "print(ser_obj[:, 2]) #取出所有行的内层索引为2的数据"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-06T03:07:43.084712800Z",
     "start_time": "2024-07-06T03:07:43.077506500Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 交换层级（数据分析用）"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "size  cloth\n",
      "0     a       -0.220064\n",
      "1     a       -1.066040\n",
      "2     a        0.401037\n",
      "0     b        0.617559\n",
      "1     b       -0.061682\n",
      "2     b        1.613820\n",
      "0     c       -0.244883\n",
      "1     c       -0.005319\n",
      "2     c       -0.866422\n",
      "0     d        0.816196\n",
      "1     d       -0.458512\n",
      "2     d       -0.196433\n",
      "dtype: float64\n",
      "--------------------------------------------------\n",
      "cloth  size\n",
      "a      0      -0.220064\n",
      "       1      -1.066040\n",
      "       2       0.401037\n",
      "b      0       0.617559\n",
      "       1      -0.061682\n",
      "       2       1.613820\n",
      "c      0      -0.244883\n",
      "       1      -0.005319\n",
      "       2      -0.866422\n",
      "d      0       0.816196\n",
      "       1      -0.458512\n",
      "       2      -0.196433\n",
      "dtype: float64\n",
      "--------------------------------------------------\n",
      "size  cloth\n",
      "0     a       -0.220064\n",
      "1     a       -1.066040\n",
      "2     a        0.401037\n",
      "0     b        0.617559\n",
      "1     b       -0.061682\n",
      "2     b        1.613820\n",
      "0     c       -0.244883\n",
      "1     c       -0.005319\n",
      "2     c       -0.866422\n",
      "0     d        0.816196\n",
      "1     d       -0.458512\n",
      "2     d       -0.196433\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print(ser_obj.swaplevel())\n",
    "print('-'*50)\n",
    "print(ser_obj)\n",
    "print('-'*50)\n",
    "ser_obj=ser_obj.swaplevel()\n",
    "print(ser_obj)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-06T03:09:05.610692Z",
     "start_time": "2024-07-06T03:09:05.589784600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "size  cloth\n",
      "0     a       -0.220064\n",
      "      b        0.617559\n",
      "      c       -0.244883\n",
      "      d        0.816196\n",
      "1     a       -1.066040\n",
      "      b       -0.061682\n",
      "      c       -0.005319\n",
      "      d       -0.458512\n",
      "2     a        0.401037\n",
      "      b        1.613820\n",
      "      c       -0.866422\n",
      "      d       -0.196433\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print(ser_obj.sort_index(level=0))  #层级索引按那个索引级别排序,level=0表示按最外层索引排序"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-06T03:09:13.335761Z",
     "start_time": "2024-07-06T03:09:13.326766400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cloth         a         b         c         d\n",
      "size                                         \n",
      "0     -0.220064  0.617559 -0.244883  0.816196\n",
      "1     -1.066040 -0.061682 -0.005319 -0.458512\n",
      "2      0.401037  1.613820 -0.866422 -0.196433\n"
     ]
    }
   ],
   "source": [
    "#把0索引（最外层索引）变为列索引\n",
    "df_obj=ser_obj.unstack(1)  #unstack可以放索引名，或者索引位置\n",
    "print(df_obj)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-06T03:10:58.855280800Z",
     "start_time": "2024-07-06T03:10:58.836291100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cloth         a         b         c         d\n",
      "size                                         \n",
      "0     -0.220064  0.617559 -0.244883  0.816196\n",
      "1     -1.066040 -0.061682 -0.005319 -0.458512\n",
      "2      0.401037  1.613820 -0.866422 -0.196433\n",
      "size  cloth\n",
      "0     a       -0.220064\n",
      "      b        0.617559\n",
      "      c       -0.244883\n",
      "      d        0.816196\n",
      "1     a       -1.066040\n",
      "      b       -0.061682\n",
      "      c       -0.005319\n",
      "      d       -0.458512\n",
      "2     a        0.401037\n",
      "      b        1.613820\n",
      "      c       -0.866422\n",
      "      d       -0.196433\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print(df_obj)\n",
    "#对df进行stack，就会把行，列索引进行堆叠，变为series\n",
    "#把列索引放入内层,只能放到内层\n",
    "print(df_obj.stack())  #stack变为series和unstack保持一致的\n",
    "# df_obj=df_obj.transpose()"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-06T03:11:35.124444700Z",
     "start_time": "2024-07-06T03:11:35.112642200Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
